{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.chat_models import ChatTongyi\n",
    "\n",
    "chat = ChatTongyi()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PydanticOutputParser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Joke(setup='为什么电脑永远不会感冒？', punchline='因为它有Windows（Windows，意为窗户，这里指电脑不会打开，所以不会受冷）')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.output_parsers import PydanticOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
    "from langchain_core.pydantic_v1 import BaseModel, Field, validator\n",
    "\n",
    "\n",
    "# Define your desired data structure.\n",
    "class Joke(BaseModel):\n",
    "    setup: str = Field(description=\"开启一个笑话的问题\")\n",
    "    punchline: str = Field(description=\"解答笑话的答案\")\n",
    "\n",
    "    # You can add custom validation logic easily with Pydantic.\n",
    "    @validator(\"setup\")\n",
    "    def question_ends_with_question_mark(cls, field):\n",
    "        if field[-1] != \"？\":\n",
    "            raise ValueError(\"Badly formed question!\")\n",
    "        return field\n",
    "\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"根据用户的输入进行解答.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "# And a query intended to prompt a language model to populate the data structure.\n",
    "chain = prompt | chat | parser\n",
    "chain.invoke({\"query\": \"讲一个笑话\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"properties\": {\"setup\": {\"title\": \"Setup\", \"description\": \"\\\\u5f00\\\\u542f\\\\u4e00\\\\u4e2a\\\\u7b11\\\\u8bdd\\\\u7684\\\\u95ee\\\\u9898\", \"type\": \"string\"}, \"punchline\": {\"title\": \"Punchline\", \"description\": \"\\\\u89e3\\\\u7b54\\\\u7b11\\\\u8bdd\\\\u7684\\\\u7b54\\\\u6848\", \"type\": \"string\"}}, \"required\": [\"setup\", \"punchline\"]}\\n```'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parser.get_format_instructions()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Json解析器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'setup': '为什么电脑永远不会感冒？', 'punchline': '因为它有Windows（窗户）但是不开！'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = JsonOutputParser(pydantic_object=Joke)\n",
    "\n",
    "chain = prompt | chat | parser\n",
    "\n",
    "chain.invoke({\"query\": \"讲一个笑话\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"properties\": {\"setup\": {\"title\": \"Setup\", \"description\": \"\\\\u5f00\\\\u542f\\\\u4e00\\\\u4e2a\\\\u7b11\\\\u8bdd\\\\u7684\\\\u95ee\\\\u9898\", \"type\": \"string\"}, \"punchline\": {\"title\": \"Punchline\", \"description\": \"\\\\u89e3\\\\u7b54\\\\u7b11\\\\u8bdd\\\\u7684\\\\u7b54\\\\u6848\", \"type\": \"string\"}}, \"required\": [\"setup\", \"punchline\"]}\\n```'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parser.get_format_instructions()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 自定义解析器\n",
    "## Runnable Lambdas and Generators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'hELLO! hOW CAN i ASSIST YOU TODAY?'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from typing import Iterable\n",
    "\n",
    "from langchain_core.messages import AIMessage, AIMessageChunk\n",
    "\n",
    "def parse(ai_message: AIMessage) -> str:\n",
    "    \"\"\"Parse the AI message.\"\"\"\n",
    "    return ai_message.content.swapcase()\n",
    "\n",
    "\n",
    "chain = chat | parse\n",
    "chain.invoke(\"hello\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Runnable Generators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "i| AM| A| LARGE LANGUAGE MODEL CREATED BY| aLIBABA cLOUD, DESIGNED TO ANSWER QUESTIONS AND| PROVIDE INFORMATION ON VARIOUS TOPICS.|"
     ]
    }
   ],
   "source": [
    "from langchain_core.runnables import RunnableGenerator\n",
    "\n",
    "\n",
    "def streaming_parse(chunks: Iterable[AIMessageChunk]) -> Iterable[str]:\n",
    "    for chunk in chunks:\n",
    "        yield chunk.content.swapcase()\n",
    "\n",
    "\n",
    "streaming_parse = RunnableGenerator(streaming_parse)\n",
    "\n",
    "chain = chat | streaming_parse\n",
    "\n",
    "for chunk in chain.stream(\"tell me about yourself in one sentence\"):\n",
    "    print(chunk, end=\"|\", flush=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 继承解析器基类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.exceptions import OutputParserException\n",
    "from langchain_core.output_parsers import BaseOutputParser\n",
    "\n",
    "\n",
    "# The [bool] desribes a parameterization of a generic.\n",
    "# It's basically indicating what the return type of parse is\n",
    "# in this case the return type is either True or False\n",
    "class BooleanOutputParser(BaseOutputParser[bool]):\n",
    "    \"\"\"Custom boolean parser.\"\"\"\n",
    "\n",
    "    true_val: str = \"YES\"\n",
    "    false_val: str = \"NO\"\n",
    "\n",
    "    def parse(self, text: str) -> bool:\n",
    "        cleaned_text = text.strip().upper()\n",
    "        if cleaned_text not in (self.true_val.upper(), self.false_val.upper()):\n",
    "            raise OutputParserException(\n",
    "                f\"BooleanOutputParser expected output value to either be \"\n",
    "                f\"{self.true_val} or {self.false_val} (case-insensitive). \"\n",
    "                f\"Received {cleaned_text}.\"\n",
    "            )\n",
    "        return cleaned_text == self.true_val.upper()\n",
    "\n",
    "    @property\n",
    "    def _type(self) -> str:\n",
    "        return \"boolean_output_parser\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parser = BooleanOutputParser()\n",
    "chain = chat | parser\n",
    "chain.invoke('回答我的问题，如果答案是\"是\"，直接回答\"Yes\"，如果答案是否，直接回答\"No\"。我的问题是：\"一周有7天吗\"')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 解析原始的LLM输出\n",
    "模型的输出其实经常包含一些额外信息`metadata`的，因此如果解析器需要这部分信息的话，可以使用下面的方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain_core.exceptions import OutputParserException\n",
    "from langchain_core.messages import AIMessage\n",
    "from langchain_core.output_parsers import BaseGenerationOutputParser\n",
    "from langchain_core.outputs import ChatGeneration, Generation\n",
    "\n",
    "\n",
    "class StrInvertCase(BaseGenerationOutputParser[str]):\n",
    "    \"\"\"An example parser that inverts the case of the characters in the message.\n",
    "\n",
    "    This is an example parse shown just for demonstration purposes and to keep\n",
    "    the example as simple as possible.\n",
    "    \"\"\"\n",
    "\n",
    "    def parse_result(self, result: List[Generation], *, partial: bool = False) -> str:\n",
    "        \"\"\"Parse a list of model Generations into a specific format.\n",
    "\n",
    "        Args:\n",
    "            result: A list of Generations to be parsed. The Generations are assumed\n",
    "                to be different candidate outputs for a single model input.\n",
    "                Many parsers assume that only a single generation is passed it in.\n",
    "                We will assert for that\n",
    "            partial: Whether to allow partial results. This is used for parsers\n",
    "                     that support streaming\n",
    "        \"\"\"\n",
    "        if len(result) != 1:\n",
    "            raise NotImplementedError(\n",
    "                \"This output parser can only be used with a single generation.\"\n",
    "            )\n",
    "        generation = result[0]\n",
    "        if not isinstance(generation, ChatGeneration):\n",
    "            # Say that this one only works with chat generations\n",
    "            raise OutputParserException(\n",
    "                \"This output parser can only be used with a chat generation.\"\n",
    "            )\n",
    "        return generation.message.content.swapcase()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "chain = chat | StrInvertCase()\n",
    "chain.invoke('回答我的问题，如果答案是\"是\"，直接回答\"Yes\"，如果答案是否，直接回答\"No\"。我的问题是：\"一周有7天吗\"')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 类命名实体识别（NER）技术"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Joke(setup='为什么电脑永远不会感冒？', punchline='因为它有Windows（Windows，意为窗户，这里指电脑不会打开通风口）！')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define your desired data structure.\n",
    "class Joke(BaseModel):\n",
    "    setup: str = Field(description=\"开启一个笑话的问题\")\n",
    "    punchline: str = Field(description=\"解答笑话的答案\")\n",
    "\n",
    "    # You can add custom validation logic easily with Pydantic.\n",
    "    @validator(\"setup\")\n",
    "    # def question_ends_with_question_mark(cls, field):\n",
    "    def test(cls, field):\n",
    "        if field[-1] != \"？\":\n",
    "            raise ValueError(\"Badly formed question!\")\n",
    "        return field\n",
    "\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"根据用户的输入进行解答.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "# And a query intended to prompt a language model to populate the data structure.\n",
    "prompt_and_model = prompt | chat\n",
    "output = prompt_and_model.invoke({\"query\": \"讲一个笑话\"})\n",
    "parser.invoke(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.output_parsers import CommaSeparatedListOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "output_parser = CommaSeparatedListOutputParser()\n",
    "\n",
    "format_instructions = output_parser.get_format_instructions()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchain",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
